home *** CD-ROM | disk | FTP | other *** search
- % A Simple Compiler
- %
- % From Stan Szpakowicz, "Logic Grammars", BYTE, Aug. 1987
- % Written in Prolog using logic grammars
- %
- % Note: In order to execute this program, a Prolog interpreter
- % must support logic grammars, of definite clause grammars
- %
-
- % === main program ===
- compile :-
- set_gensym( "$lbl" ), set_gensym( "$mem" ),
- read_in( Chars ), % (skips initial white space)
- lsym_list( LexSyms, Chars, [] ), % lexical analysis
- program( Tree, LexSyms, [] ), % syntactic analysis
- interm_code( Tree, Code, [] ), % code generation
- write_out( Code ), !.
- compile :- write( 'Sorry' ), nl.
-
- % read in a sequence of characters terminated by a #
- read_in( Chars ) :- get( Ch ), read_in( Ch, Chars ).
-
- read_in( 35, [] ) :- !. % #
- read_in( Ch, [Ch | Chars] ) :- get0( Ch1 ), read_in( Ch1, Chars ).
-
- % print the generated code one instruction per line
- write_out( [] ).
- write_out( [Instr | Instrs] ) :-
- write( Instr ), nl, write_out( Instrs ).
-
- % === scanner ===
- % list of lexical symbols
- lsym_list( [LexSym | LexSyms] ) -->
- lsym( LexSym ), !, opt_space, lsym_list( LexSyms ).
- lsym_list( [] ) --> [].
-
- % one lexical symbol (input tokens are ASCII codes)
- lsym( IdOrKwd ) --> letter( L ), alphanums( Ls ),
- { name( Nm, [L | Ls] ) }, { wrap_name( Nm, IdOrKwd ) }.
- lsym( num( N ) ) --> digit( D ), digits( Ds ),
- { name( N, [D | Ds] ) }.
- lsym( := ) --> [58], [61]. % colon, equals
- lsym( S ) --> [Ch], { name( S, [Ch] ) }.
-
- % optional white space between lexical symbols
- opt_space --> white_space, !, opt_space.
- opt_space --> [].
-
- % recognizing classes of ASCII codes
- letter( L ) --> [L], { is_letter( L ) }.
- digit( D ) --> [D], { is_digit( D ) }.
- white_space --> [Ch], { is_white_space( Ch ) }.
-
- is_letter( Ch ) :- 65 =< Ch, Ch =< 90.
- is_letter( Ch ) :- 97 =< Ch, Ch =< 122.
-
- is_digit( Ch ) :- 48 =< Ch, Ch =< 57.
-
- is_white_space( 32 ). % blank space
- is_white_space( 13 ). % new line (this would be 10 in Quintus Prolog)
- is_white_space( 9 ). % tab
-
- % keywords and identifiers
- alphanums( [L | Ls] ) --> letter( L ), alphanums( Ls ).
- alphanums( [L | Ls] ) --> digit( L ), alphanums( Ls ).
- alphanums( [] ) --> [].
-
- wrap_name( Nm, Nm ) :- is_keyword( Nm ).
- wrap_name( Nm, id( Nm ) ).
-
- % table of keywords
- is_keyword( if ). is_keyword( then ). is_keyword( fi ).
- is_keyword( while ). is_keyword( do ). is_keyword( od ).
- is_keyword( skip ). is_keyword( not ).
-
- % integers
- digits( [D | Ds] ) --> digit( D ), digits( Ds ).
- digits( [] ) --> [].
-
- % === parser ===
- program( s( Stmt, Stmts) ) -->
- statement( Stmt ), [';'],
- statements( Stmts ).
-
- statements( s( Stmt, Stmts) ) -->
- statement( Stmt ), [';'], !,
- statements( Stmts ).
- statements( skip ) --> [].
- % a sequence of statements is represented as a nested term,
- % for example s( Stmt1, s( Stmt2, s( Stmt3, skip ) ) ),
- % where Stmt1, Stmt2, Stmt3 represent individual statements
-
- statement( skip ) --> [skip].
- statement( let( V, E ) ) --> [id( V )], [:=], expr( E ).
- statement( if( C, Stmts ) ) -->
- [if], condition( C ), [then], statements( Stmts ), [fi].
- statement( while( C, Stmts ) ) -->
- [while], condition( C ), [do], statements( Stmts ), [od].
-
- condition( not( C ) ) --> [not], relation( C ).
- condition( C ) --> relation( C ).
-
- relation( cond( Op, E1, E2 ) ) --> expr( E1), comp_op( Op ), expr( E2 ).
-
- comp_op( '=' ) --> ['='].
- comp_op( '<' ) --> ['<'].
-
- expr( E ) --> add_expr( AE ), rest_expr( AE, E ).
-
- rest_expr( AE1, E ) -->
- ['+'], add_expr( AE2 ), rest_expr( e( '+', AE1, AE2 ), E ).
- rest_expr( AE1, E ) -->
- ['-'], add_expr( AE2 ), rest_expr( e( '-', AE1, AE2 ), E ).
- rest_expr( E, E ) --> [].
-
- add_expr( AE ) --> mult_expr( ME ), rest_add_expr( ME, AE ).
-
- rest_add_expr( ME1, AE ) -->
- ['*'], mult_expr( ME2 ), rest_add_expr( e( '*', ME1, ME2 ), AE ).
- rest_add_expr( ME1, AE ) -->
- ['/'], mult_expr( ME2 ), rest_add_expr( e( '/', ME1, ME2 ), AE ).
- rest_add_expr( E, E ) --> [].
-
- mult_expr( var( V ) ) --> [id( V )].
- mult_expr( num( N ) ) --> [num( N )].
- mult_expr( E ) --> ['('], expr( E ), [')'].
-
- % === code generation ===
- % statements
- interm_code( s( Stmt, Stmts ) ) -->
- interm_code( Stmt ), interm_code( Stmts ).
- interm_code( skip ) --> [].
- interm_code( let( V, E ) ) -->
- expr_interm_code( E ), [store( V )].
- interm_code( if( C, Stmts ) ) -->
- { newlabel( L ) },
- cond_interm_code( not( C ) ),
- [jmp_cond( L )],
- interm_code( Stmts ),
- [label( L )].
- interm_code( while( C, Stmts ) ) -->
- { newlabel( L1 ) }, { newlabel( L2 ) },
- [label( L1 )],
- cond_interm_code( not( C ) ),
- [jmp_cond( L2 )],
- interm_code( Stmts ),
- [jmp( L1 )], [label( L2 )].
-
- % conditions
- cond_interm_code( not( not( C ) ) ) --> cond_interm_code( C ).
- cond_interm_code( not( R ) ) -->
- rel_interm_code( R ), [flip].
- % flip: negate the contents of the condition register
- cond_interm_code( R ) -->
- rel_interm_code( R ).
-
- % relations
- rel_interm_code( cond( Op, E1, E2 ) ) -->
- expr_interm_code( E2 ), { newmemloc( M ) }, [store( M )],
- expr_interm_code( E1 ), [sub( M )], tst_interm_code( Op ).
-
- % set the condition register
- tst_interm_code( '=' ) --> [tst_zer].
- tst_interm_code( '<' ) --> [tst_neg].
-
- % expressions
- expr_interm_code( e( Op, E1, E2 ) ) -->
- expr_interm_code( E2 ), { newmemloc( M ) }, [store( M )],
- expr_interm_code( E1 ), eop_interm_code( Op, M ).
- expr_interm_code( var( V ) ) -->
- [load( V )].
- % load a constant
- expr_interm_code( num( N ) ) -->
- [loadc( N )].
-
- eop_interm_code( '+', M ) --> [add( M )].
- eop_interm_code( '-', M ) --> [sub( M )].
- eop_interm_code( '*', M ) --> [mul( M )].
- eop_interm_code( '/', M ) --> [div( M )].
-
- % auxiliaries
- newlabel( L ) :-
- gensym( "$lbl", L ).
- newmemloc( M ) :-
- gensym( "$mem", M ).
-
- % === utilities ===
- % symbol generator (preset in the main program)
- set_gensym( Pref ) :-
- retract( sym( Pref, _ ) ), fail.
- set_gensym( Pref ) :-
- assert( sym( Pref, 1 ) ).
-
- gensym( Pref, Sym ) :-
- retract( sym( Pref, Num ) ),
- Num1 is Num + 1,
- assert( sym( Pref, Num1 ) ),
- glue( Pref, Num, Sym ).
-
- glue( Pref, Num, Sym ) :-
- name( Num, Digits ), append( Pref, Digits, All ),
- name( Sym, All ), !.
-
- % well, you can't have a program without append...
- append( [], Z, Z ).
- append( [A | X], Y, [A | Z] ) :- append( X, Y, Z ).
-
- % end of program
-